library(fredr)
library(tidyverse)
library(forecast)
library(ggplot2)
library(fpp3)
library(tsibble)
library(gtrendsR)
library(tidyquant)
library(PerformanceAnalytics)

# set api key
fredr_set_key('2ce72ebb6c7e053880ec7ad5a950237f')
# gather data
start_date <- "1978-01-01"
end_date <- "2023-11-01"

# UMICH Consumer Sentiment Index: https://fred.stlouisfed.org/series/UMCSENT
consumer_sentiment <- fredr(
  series_id = "UMCSENT",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# Unemployment Rate: https://fred.stlouisfed.org/series/UNRATE
unemployment <- fredr(
  series_id = "UNRATE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in Unemployment Rate: https://fred.stlouisfed.org/series/UNRATE
unemployment_change <- fredr(
  series_id = "UNRATE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in Real Income: https://fred.stlouisfed.org/series/DSPIC96
income <- fredr(
  series_id = "DSPIC96",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in CPI: https://fred.stlouisfed.org/series/CPIAUCSL
cpi <- fredr(
  series_id = "CPIAUCSL",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# Avg 30-year Mortgage Rate: https://fred.stlouisfed.org/series/MORTGAGE30US
mortgage <- fredr(
  series_id = "MORTGAGE30US",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in Mortgage Rate: https://fred.stlouisfed.org/series/MORTGAGE30US
mortgage_change <- fredr(
  series_id = "MORTGAGE30US",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# YOY % Change in Median House Price: https://fred.stlouisfed.org/series/MSPUS
house <- fredr(
  series_id = "MSPUS",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "q", # quarterly
  units = "pc1"
)

# YOY % Change in Personal Consumption: https://fred.stlouisfed.org/series/PCE
consumption <- fredr(
  series_id = "PCE",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "pc1"
)

# NBER Recession Indicator: https://fred.stlouisfed.org/series/USREC
recession_indicator <- fredr(
  series_id = "USREC",
  observation_start = as.Date(start_date),
  observation_end = as.Date(end_date),
  frequency = "m", # monthly
  units = "lin"
)

# YOY % Change in S&P 500 Index: https://finance.yahoo.com/quote/%5EGSPC?p=%5EGSPC
getSymbols("^GSPC", from = "1977-01-01",
           to = end_date,
           warnings = FALSE,
           auto.assign = TRUE,
           src = "yahoo")
## [1] "GSPC"
# YOY % Change in US Dollar Index: 
getSymbols("DX-Y.NYB", from = "1977-01-01",
           to = end_date,
           warnings = FALSE,
           auto.assign = TRUE,
           src = "yahoo")
## [1] "DX-Y.NYB"
# process fred data
process <- function(data, name) {
  data[name] <- data$value 
  
  data %>%
    select(date, all_of(name))
}

consumer_sentiment_modified <- process(consumer_sentiment, 'consumer_sentiment')
unemployment_modified <- process(unemployment, 'unemployment_rate')
unemployment_change_modified <- process(unemployment_change, 'unemployment_rate_change')
income_modified <- process(income, 'income')
cpi_modified <- process(cpi, 'cpi')
mortgage_modified <- process(mortgage, 'mortgage')
mortgage_change_modified <- process(mortgage_change, 'mortgage_change')
house_modified <- process(house, 'house')
consumption_modified <- process(consumption, 'consumption')
recession_indicator_modified <- process(recession_indicator, 'recession_indicator')

# breakout quarters to months for house data
house_modified <- house_modified %>%
  complete(date = seq.Date(min(date), max(date), by = "month")) %>%
  fill(house)

# process sp500 data
sp500_modified <- as.data.frame(GSPC) %>%
    rownames_to_column('date_day') %>%
    select(date_day, GSPC.Close) %>%
    mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
    group_by(date) %>%
    summarize(avg_close = mean(GSPC.Close)) %>%
    mutate(sp500 = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
    select(-avg_close) %>%
    filter(date >= start_date)

# process dollar_index data
dollar_index_modified <- as.data.frame(`DX-Y.NYB`) %>%
    rownames_to_column('date_day') %>%
    select(date_day, `DX-Y.NYB.Close`) %>%
    filter(!is.na(`DX-Y.NYB.Close`)) %>%
    mutate(date = as.Date(paste(substr(date_day, start = 1, stop = 7), "-01", sep=''))) %>%
    group_by(date) %>%
    summarize(avg_close = mean(`DX-Y.NYB.Close`)) %>%
    mutate(dollar_index = ((avg_close - lag(avg_close, 12)) / lag(avg_close, 12)) * 100) %>%
    select(-avg_close) %>%
    filter(date >= start_date)
# join data

join <- consumer_sentiment_modified %>%
    left_join(unemployment_modified, by = c('date' = 'date')) %>%
    left_join(unemployment_change_modified, by = c('date' = 'date')) %>%
    left_join(income_modified, by = c('date' = 'date')) %>%
    left_join(cpi_modified, by = c('date' = 'date')) %>%
    left_join(mortgage_modified, by = c('date' = 'date')) %>%
    left_join(mortgage_change_modified, by = c('date' = 'date')) %>%
    left_join(house_modified, by = c('date' = 'date')) %>%
    left_join(consumption_modified, by = c('date' = 'date')) %>%
    left_join(sp500_modified, by = c('date' = 'date')) %>%
    left_join(dollar_index_modified, by = c('date' = 'date')) %>%
    left_join(recession_indicator_modified, by = c('date' = 'date'))

head(join)
## # A tibble: 6 × 13
##   date       consumer_sentiment unemployment_rate unemployment_rate_cha…¹ income
##   <date>                  <dbl>             <dbl>                   <dbl>  <dbl>
## 1 1978-01-01               83.7               6.4                   -14.7   4.28
## 2 1978-02-01               84.3               6.3                   -17.1   5.84
## 3 1978-03-01               78.8               6.3                   -14.9   5.24
## 4 1978-04-01               81.6               6.1                   -15.3   5.45
## 5 1978-05-01               82.9               6                     -14.3   5.36
## 6 1978-06-01               80                 5.9                   -18.1   4.93
## # ℹ abbreviated name: ¹​unemployment_rate_change
## # ℹ 8 more variables: cpi <dbl>, mortgage <dbl>, mortgage_change <dbl>,
## #   house <dbl>, consumption <dbl>, sp500 <dbl>, dollar_index <dbl>,
## #   recession_indicator <dbl>
# processing for modeling
final <- join[rowSums(is.na(join)) == 0,]

# visualize
final_pivoted <- final %>%
  mutate(`UMICH Consumer Sentiment` = consumer_sentiment,
         `Unemployment Rate` = unemployment_rate, 
         `YOY % Change in Unemployment Rate` = unemployment_rate_change,
         `YOY % Change in Median Real Disposable Income` = income, 
         `YOY % Change in Personal Consumption` = consumption,
         `YOY % Change in CPI` = cpi, 
         `YOY % Change in S&P 500` = sp500, 
         `YOY % Change in Dollar Index` = dollar_index,
         `Avg 30-year Mortgage Rate` = mortgage,
         `YOY % Change in Mortgage Rate` = mortgage_change,
         `YOY % Change in Median House Price` = house,
         `NBER Recession Indicator` = recession_indicator) %>%
  select(date, contains(" ")) %>%
  pivot_longer(cols = -c(date), 
               names_to = 'variable', values_to = 'value')

visual1 <- final_pivoted %>%
  filter(variable %in% c("UMICH Consumer Sentiment", 
                         "Unemployment Rate", 
                         "YOY % Change in Unemployment Rate", 
                         "YOY % Change in Median Real Disposable Income",
                         "YOY % Change in Personal Consumption",
                         "YOY % Change in CPI"))

visual2 <- final_pivoted %>%
  filter(variable %in% c("YOY % Change in S&P 500", 
                         "YOY % Change in Dollar Index", 
                         "Avg 30-year Mortgage Rate", 
                         "YOY % Change in Mortgage Rate",
                         "YOY % Change in Median House Price",
                         "NBER Recession Indicator"))

# create visuals
ggplot(visual1, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
  theme_minimal() +
  theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
  facet_wrap(~variable, scales = "free_y", ncol = 2)

ggplot(visual2, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(x = "", y = "Value", color = 'Variable', title = "Economic Indicators Over Time", caption = 'Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy') +
  theme_minimal() +
  theme(legend.position = "none", plot.caption = element_text(size = 7, hjust=0)) +
  facet_wrap(~variable, scales = "free_y", ncol = 2)

# eda

# correlation matrix
cor_matrix <- cor(final %>% select(-date))
chart.Correlation(cor_matrix)

# multivariate forecast: https://stackoverflow.com/questions/70175496/how-to-plot-my-multivariable-regression-time-series-model-in-r

train <- as_tsibble(final) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(final) %>%
    filter(date >= '1978-01-01') 
## Using `date` as index variable.
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment + season() + trend()))

check <- lm(consumer_sentiment ~ ., data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.212  -3.576   0.277   3.677  15.836 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               1.054e+02  3.941e+00  26.741  < 2e-16 ***
## date                      1.094e-05  1.919e-04   0.057 0.954569    
## unemployment_rate        -4.665e+00  1.918e-01 -24.319  < 2e-16 ***
## unemployment_rate_change  1.038e-02  2.469e-02   0.420 0.674537    
## income                    6.078e-01  1.918e-01   3.168 0.001637 ** 
## cpi                      -3.500e+00  1.922e-01 -18.213  < 2e-16 ***
## mortgage                  1.685e+00  2.375e-01   7.095 4.98e-12 ***
## mortgage_change           6.509e-03  2.664e-02   0.244 0.807117    
## house                     2.319e-01  6.377e-02   3.637 0.000307 ***
## consumption               8.061e-01  2.393e-01   3.369 0.000819 ***
## sp500                     1.374e-01  1.925e-02   7.139 3.74e-12 ***
## dollar_index              8.232e-02  2.877e-02   2.862 0.004410 ** 
## recession_indicator      -5.015e+00  1.121e+00  -4.475 9.66e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.274 on 455 degrees of freedom
## Multiple R-squared:  0.8327, Adjusted R-squared:  0.8283 
## F-statistic: 188.7 on 12 and 455 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 16
## 
## data:  Residuals
## LM test = 226.97, df = 16, p-value < 2.2e-16
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment + season() +
##   trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(as_tsibble(final)) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
    ggtitle("Expected vs Actual Consumer Sentiment from 1978 to 2023") +
    labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate,YOY % change in unemployment rate, 
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index, 
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate, 
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '5 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# narrow date range

start_date <- '2004-01-01'

plot_range <- as_tsibble(final) %>%
  filter(date >= start_date)
## Using `date` as index variable.
test <- as_tsibble(final) %>%
    filter(date >= start_date) 
## Using `date` as index variable.
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment + season() + trend()))

forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment + season() +
##   trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(plot_range) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators from 1978-2016") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Economic indicators used in estimating consumer sentiment include unemployement rate,YOY % change in unemployment rate, 
YOY % change in median real disposable income, YOY % change in personal consumption, YOY % change in consumer price index, 
YOY % change in S&P 500 index, YOY % change in dollar index, Avg 30-year mortgage rate, YOY % change in mortgage rate, 
YOY % change in median house price, and NBER recession indicator. Training data is monthly from Jan 1978 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))

# adding google trends
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=recession&hl=en
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=inflation&hl=en
# https://trends.google.com/trends/explore?cat=16&date=all&geo=US&q=prices&hl=en
# news category is represented with 16, all categories is 0


# search1 <- gtrends(c("inflation"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search2 <- gtrends(c("recession"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search3 <- gtrends(c("prices"), category = 16, time= "all", geo = "US")$interest_over_time %>%
#   select(date, keyword, hits)
# 
# search <- union(search1, union(search2, search3))

search <- read.csv("GTrends Exported - All.csv") %>%
    mutate(date = as.Date(date))

search_pivoted <- search %>%
  pivot_wider(names_from = keyword, values_from = hits) %>%
  select(date, `inflation`, recession, prices)

p <- ggplot() + 
  geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
  labs(x = "", y = "Interest over time", subtitle="Among All Categories in US, Normalized 0 to 100, Monthly", color = 'Keyword') +
  ggtitle("Google Search Trend for Keywords from 2004 to 2023") +
  labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
  scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
  theme_minimal() +
  theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))

p + facet_wrap(~keyword, scales = "free_y", ncol = 1)

# combine with economic indicators
combined <- final %>%
  filter(date >= '2004-01-01') %>%
  left_join(search_pivoted, by = c('date' = 'date'))

# correlation matrix
cor_matrix <- cor(combined %>% select(-date))
chart.Correlation(cor_matrix)

train <- as_tsibble(combined) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(combined)
## Using `date` as index variable.
# testing with recession keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - prices - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - prices - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - prices - inflation, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3793  -2.7671   0.2592   2.9507  11.3334 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              140.567464  26.869781   5.231 5.91e-07 ***
## date                      -0.001411   0.001115  -1.266   0.2075    
## unemployment_rate         -4.456883   0.424211 -10.506  < 2e-16 ***
## unemployment_rate_change   0.092761   0.060661   1.529   0.1285    
## income                    -0.110324   0.301316  -0.366   0.7148    
## cpi                       -3.559807   0.594367  -5.989 1.65e-08 ***
## mortgage                  -1.873612   1.736709  -1.079   0.2825    
## mortgage_change           -0.038910   0.053383  -0.729   0.4673    
## house                      0.315680   0.139704   2.260   0.0254 *  
## consumption                1.549435   0.713086   2.173   0.0314 *  
## sp500                      0.132132   0.052522   2.516   0.0130 *  
## dollar_index               0.021832   0.069004   0.316   0.7522    
## recession_indicator       -6.724649   2.531366  -2.657   0.0088 ** 
## recession                 -0.044110   0.082145  -0.537   0.5921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.696 on 142 degrees of freedom
## Multiple R-squared:  0.8459, Adjusted R-squared:  0.8318 
## F-statistic: 59.96 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 52.751, df = 17, p-value = 1.564e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - prices -
##   inflation + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Recession' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'recession' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with prices keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - recession - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - recession - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - recession - inflation, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.381  -2.662   0.129   3.095  11.278 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              138.084819  26.396869   5.231 5.92e-07 ***
## date                      -0.001279   0.001106  -1.157  0.24920    
## unemployment_rate         -4.475778   0.422658 -10.590  < 2e-16 ***
## unemployment_rate_change   0.090013   0.060384   1.491  0.13826    
## income                    -0.116889   0.301225  -0.388  0.69856    
## cpi                       -3.557611   0.599192  -5.937 2.12e-08 ***
## mortgage                  -1.663660   1.700098  -0.979  0.32946    
## mortgage_change           -0.042308   0.053283  -0.794  0.42851    
## house                      0.349568   0.136878   2.554  0.01171 *  
## consumption                1.592867   0.712739   2.235  0.02699 *  
## sp500                      0.127075   0.053858   2.359  0.01966 *  
## dollar_index               0.021687   0.069063   0.314  0.75397    
## recession_indicator       -7.309915   2.206584  -3.313  0.00117 ** 
## prices                    -0.018055   0.056703  -0.318  0.75064    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.699 on 142 degrees of freedom
## Multiple R-squared:  0.8457, Adjusted R-squared:  0.8316 
## F-statistic: 59.87 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 52.218, df = 17, p-value = 1.9e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - recession -
##   inflation + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Prices' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'prices' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with inflation keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - prices - recession + season() + trend()))

check <- lm(consumer_sentiment ~ . - prices - recession, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - prices - recession, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2435  -2.7193   0.1856   2.9775  11.1544 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              139.736907  27.208804   5.136 9.10e-07 ***
## date                      -0.001361   0.001112  -1.224  0.22303    
## unemployment_rate         -4.502208   0.430178 -10.466  < 2e-16 ***
## unemployment_rate_change   0.093466   0.062128   1.504  0.13469    
## income                    -0.111312   0.303344  -0.367  0.71420    
## cpi                       -3.584278   0.592903  -6.045 1.25e-08 ***
## mortgage                  -1.768764   1.726863  -1.024  0.30745    
## mortgage_change           -0.037863   0.054576  -0.694  0.48896    
## house                      0.338239   0.132872   2.546  0.01198 *  
## consumption                1.620768   0.725416   2.234  0.02703 *  
## sp500                      0.130653   0.052515   2.488  0.01401 *  
## dollar_index               0.022791   0.069083   0.330  0.74196    
## recession_indicator       -7.287217   2.224680  -3.276  0.00132 ** 
## inflation                 -0.038588   0.131022  -0.295  0.76879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.699 on 142 degrees of freedom
## Multiple R-squared:  0.8457, Adjusted R-squared:  0.8316 
## F-statistic: 59.86 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 52.981, df = 17, p-value = 1.439e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - prices -
##   recession + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Inflation' among All Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'inflation' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

search <- read.csv("GTrends Exported - News.csv") %>%
    mutate(date = as.Date(date))

search_pivoted <- search %>%
  pivot_wider(names_from = keyword, values_from = hits) %>%
  select(date, `inflation`, recession, prices)

p <- ggplot() + 
  geom_line(data = search, aes(x = date, y = hits, col = keyword)) +
  labs(x = "", y = "Interest over time", subtitle="Among News Categories, Monthly, Normalized 0 to 100", color = 'Keyword') +
  ggtitle("Google Search Trend for Keywords from 2004 to 2023") +
  labs(legend = 'Keyword', caption = "Source: Google | Visual by Michael Dunphy, @mtdunphy") +
  scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
  theme_minimal() +
  theme(plot.caption = element_text(size = 7, hjust=0))

p + facet_wrap(~keyword, scales = "free_y", ncol = 1)

# combine with economic indicators
combined <- final %>%
  filter(date >= '2004-01-01') %>%
  left_join(search_pivoted, by = c('date' = 'date'))

# correlation matrix
cor_matrix <- cor(combined %>% select(-date))
chart.Correlation(cor_matrix)

train <- as_tsibble(combined) %>%
    filter(date < '2017-01-01')
## Using `date` as index variable.
test <- as_tsibble(combined)
## Using `date` as index variable.
# testing with recession keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - prices - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - prices - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - prices - inflation, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7807  -2.7984   0.1272   2.8439  12.5937 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              147.425713  26.938292   5.473 1.95e-07 ***
## date                      -0.001685   0.001119  -1.506   0.1344    
## unemployment_rate         -4.473129   0.419349 -10.667  < 2e-16 ***
## unemployment_rate_change   0.094064   0.059941   1.569   0.1188    
## income                    -0.091268   0.298973  -0.305   0.7606    
## cpi                       -3.558934   0.588578  -6.047 1.24e-08 ***
## mortgage                  -2.282685   1.732766  -1.317   0.1898    
## mortgage_change           -0.034155   0.053021  -0.644   0.5205    
## house                      0.263163   0.140986   1.867   0.0640 .  
## consumption                1.526555   0.706816   2.160   0.0325 *  
## sp500                      0.133651   0.052136   2.564   0.0114 *  
## dollar_index               0.012570   0.068809   0.183   0.8553    
## recession_indicator       -5.622997   2.466085  -2.280   0.0241 *  
## recession                 -0.096116   0.063334  -1.518   0.1313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.663 on 142 degrees of freedom
## Multiple R-squared:  0.8481, Adjusted R-squared:  0.8341 
## F-statistic: 60.96 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 51.964, df = 17, p-value = 2.082e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - prices -
##   inflation + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
      labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Recession' among News Categories") +
      ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
      labs(caption = "Note: Google search trend is for keyword 'recession' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with prices keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - recession - `inflation` + season() + trend()))

check <- lm(consumer_sentiment ~ . - consumer_sentiment - recession - `inflation`, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - consumer_sentiment - recession - 
##     inflation, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3417  -2.6824   0.1424   2.9988  11.2508 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              137.791626  26.388907   5.222 6.18e-07 ***
## date                      -0.001310   0.001102  -1.189  0.23642    
## unemployment_rate         -4.479901   0.423156 -10.587  < 2e-16 ***
## unemployment_rate_change   0.089571   0.060724   1.475  0.14242    
## income                    -0.120856   0.302790  -0.399  0.69039    
## cpi                       -3.578623   0.600333  -5.961 1.89e-08 ***
## mortgage                  -1.674435   1.701045  -0.984  0.32661    
## mortgage_change           -0.041483   0.053235  -0.779  0.43713    
## house                      0.340626   0.134692   2.529  0.01253 *  
## consumption                1.581596   0.712550   2.220  0.02803 *  
## sp500                      0.130241   0.053314   2.443  0.01580 *  
## dollar_index               0.022424   0.069168   0.324  0.74627    
## recession_indicator       -7.371562   2.267236  -3.251  0.00143 ** 
## prices                    -0.003558   0.049867  -0.071  0.94322    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.7 on 142 degrees of freedom
## Multiple R-squared:  0.8456, Adjusted R-squared:  0.8315 
## F-statistic: 59.82 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 52.637, df = 17, p-value = 1.631e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - recession -
##   inflation + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
      labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Prices' among News Categories") +
      ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
      labs(caption = "Note: Google search trend is for keyword 'prices' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.

# testing with inflation keyword
fit <- train %>% 
    model(TSLM(consumer_sentiment ~ . - consumer_sentiment - prices - recession + season() + trend()))

check <- lm(consumer_sentiment ~ . - consumer_sentiment - prices - recession, data = train)
summary(check)
## 
## Call:
## lm(formula = consumer_sentiment ~ . - consumer_sentiment - prices - 
##     recession, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1852  -2.7896   0.3136   2.9462  11.1106 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              143.810207  27.666889   5.198 6.88e-07 ***
## date                      -0.001570   0.001156  -1.358  0.17657    
## unemployment_rate         -4.528533   0.427751 -10.587  < 2e-16 ***
## unemployment_rate_change   0.100102   0.062177   1.610  0.10963    
## income                    -0.103394   0.301405  -0.343  0.73208    
## cpi                       -3.594333   0.592151  -6.070 1.11e-08 ***
## mortgage                  -1.854360   1.714803  -1.081  0.28136    
## mortgage_change           -0.034501   0.054016  -0.639  0.52404    
## house                      0.332845   0.132932   2.504  0.01342 *  
## consumption                1.700369   0.730475   2.328  0.02134 *  
## sp500                      0.128072   0.052580   2.436  0.01610 *  
## dollar_index               0.019376   0.069058   0.281  0.77945    
## recession_indicator       -6.962832   2.268964  -3.069  0.00258 ** 
## inflation                 -0.078782   0.110447  -0.713  0.47683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.692 on 142 degrees of freedom
## Multiple R-squared:  0.8461, Adjusted R-squared:  0.8321 
## F-statistic: 60.07 on 13 and 142 DF,  p-value: < 2.2e-16
checkresiduals(check)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 52.248, df = 17, p-value = 1.879e-05
forecast_consumption <- forecast(fit, new_data = test)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `TSLM(consumer_sentiment ~ . - consumer_sentiment - prices -
##   recession + season() + trend()) = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
forecast_consumption %>% 
  autoplot(combined) + 
    labs(level = 'Level', x = "", y = "UMICH Consumer Sentiment", subtitle="Based on Economic Indicators + Google Search Trend: 'Inflation' among News Categories") +
    ggtitle("Expected vs Actual Consumer Sentiment from 2004 to 2023") +
    labs(caption = "Note: Google search trend is for keyword 'inflation' among all categories in the US. Economic indicators used in estimating consumer 
sentiment include unemployement rate,YOY % change in unemployment rate, YOY % change in median real disposable income, YOY % 
change in personal consumption, YOY % change in consumer price index, YOY % change in S&P 500 index, YOY % change in dollar 
index, Avg 30-year mortgage rate, YOY % change in mortgage rate, YOY % change in median house price, and NBER recession indicator. 
Training data is monthly from Jan 2004 to Jan 2017.
          
Source: Federal Reserve Bank of St Louis (FRED), Yahoo Finance, Google | Visual by Michael Dunphy, @mtdunphy") +
          scale_x_date(date_breaks = '2 year', date_labels = "%Y") +
          theme_bw() +
          theme(plot.caption = element_text(size = 7, hjust=0), plot.subtitle = element_text(size = 10, hjust=0))
## Using `date` as index variable.